"""
CSS偏移反爬虫
爬取目标站点：http://www.porters.vip/confusion/flight.html
航班名称、所属航空名称、票价

第一架航班信息，票价：467
<span class="prc_wp" style="width:48px">
    <em class="rel">
        <b style="width:48px;left:-48px;">
            <i style="width: 16px;">7</i>
            <i style="width: 16px;">7</i>
            <i style="width: 16px;">7</i>
        </b>
        <b style="width: 16px;left:-32px">6</b>
        <b style="width: 16px;left:-48px">4</b>
    </em>
</span>

第一架航班信息，票价：705
<span class="prc_wp" style="width:64px">
    <em class="rel">
        <b style="width:64px;left:-64px;">
            <i style="width: 16px;">8</i>
            <i style="width: 16px;">3</i>
            <i style="width: 16px;">9</i>
            <i style="width: 16px;">5</i>
        </b>
        <b style="width: 16px;left:-32px;">0</b>
        <b style="width: 16px;left:-48px">7</b>
        <b style="width: 16px;left:-16px">5</b>
    </em>
</span>
"""

import json
import re
import requests
from lxml import etree


def deal_price_ele(eles):
    """处理价格标签,提取偏移量和数字"""
    alternate_price = []
    for item in eles:
        # todo 提取b标签的style属性值
        style = item.xpath('.//@style')[0]
        # todo 获取b标签left偏移量
        position = "".join(re.findall(re.compile(r'.*left:(.*?)px.*'), style))
        # todo 获取标签文本
        text = item.xpath('.//text()')[0]
        alternate_price.append({'position': position, 'value': text})
    return alternate_price


url = 'http://www.porters.vip/confusion/flight.html'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/92.0.4515.159 Safari/537.36 '
}
res = requests.get(url, headers=headers)
parser = etree.HTMLParser(encoding='utf-8')
html = etree.HTML(res.text, parser=parser)

flight_info = []
flights = html.xpath('//*[contains(@class, "e-airfly")]')  # todo 获取所有航班信息
for element in flights:
    air_name = element.xpath('.//*[contains(@class, "air")]//span/text()')[0].strip()  # todo 航班名称
    from_addr = element.xpath('.//*[contains(@class, "sep-lf")]//h2//text()')[0].strip()  # todo 起始地
    to_addr = element.xpath('.//*[contains(@class, "sep-rt")]//h2//text()')[0].strip()  # todo 目的地
    price_ele = element.xpath('.//*[contains(@class, "fix_price")]//*[contains(@class, "rel")]//b')  # todo 获取价格标签
    b1 = price_ele.pop(0)  # todo 获取第一个b标签
    base_price = b1.xpath('.//i//text()')  # todo 获取第一对b标签的值
    alternate_price = deal_price_ele(price_ele)  # todo 获取其他b标签的信息：内容、偏移量
    for item in alternate_price:
        position = int(item.get('position'))  # todo 获取position
        value = item.get('value')  # todo 获取值
        index = int(position / 16)  # todo 计算替换下标index
        base_price[index] = value
    # todo 插入航班信息
    flight_info.append({'air_name': air_name, 'from_addr': from_addr, 'to_addr': to_addr, 'price': ''.join(base_price)})

print(json.dumps(flight_info, ensure_ascii=False, indent=4))
